us <- read.csv("https://raw.githubusercontent.com/JaclynCoate/6373_Time_Series/master/TermProject/Data/USdaily.7.26.20.csv", header = T, strip.white = T)
us <- transform(us, date = as.Date(as.character(date), "%Y%m%d"))
us <- subset(us, select = -c(states, dateChecked, hospitalized, lastModified, total, posNeg, totalTestResultsIncrease, hash))
us[is.na(us)] <- 0
us = us[order(as.Date(us$date, format = "%Y%m%d")),]
head(us)
## date positive negative pending hospitalizedCurrently
## 187 2020-01-22 2 0 0 0
## 186 2020-01-23 2 0 0 0
## 185 2020-01-24 2 0 0 0
## 184 2020-01-25 2 0 0 0
## 183 2020-01-26 2 0 0 0
## 182 2020-01-27 2 0 0 0
## hospitalizedCumulative inIcuCurrently inIcuCumulative
## 187 0 0 0
## 186 0 0 0
## 185 0 0 0
## 184 0 0 0
## 183 0 0 0
## 182 0 0 0
## onVentilatorCurrently onVentilatorCumulative recovered death
## 187 0 0 0 0
## 186 0 0 0 0
## 185 0 0 0 0
## 184 0 0 0 0
## 183 0 0 0 0
## 182 0 0 0 0
## totalTestResults deathIncrease hospitalizedIncrease negativeIncrease
## 187 2 0 0 0
## 186 2 0 0 0
## 185 2 0 0 0
## 184 2 0 0 0
## 183 2 0 0 0
## 182 2 0 0 0
## positiveIncrease
## 187 0
## 186 0
## 185 0
## 184 0
## 183 0
## 182 0
Traits:
ggplot(data = us, aes(x=date, y=hospitalizedCurrently))+
geom_line(color="orange")+
labs(title = "Current COVID Hospitalized Cases US", y = "Thousands", x = "") +
theme_fivethirtyeight()
It is difficult to assume stationarity for this data due to multiple factors. We are working under the assumption that COVID is a novel virus and cases as well as hospitalizations will eventually return to zero. This being said our current modeling techniques do things such as return to the median or mimic the previously seen trends. Also, we see a heavy wandering trend in both new cases and hospitalization would be dependent on this as well as time. We will review the data and see what, if any, non-stationary components reveal themselves and model the data accordingly.
It is difficult to assume stationarity for this data due to multiple factors. We are working under the assumption that COVID is a novel virus and cases as well as hospitalizations will eventually return to zero. This being said our current modeling techniques do things such as return to the median or mimic the previously seen trends. Also, we see a heavy wandering trend in both new cases and hospitalization would be dependent on this as well as time. We will review the data and see what, if any, non-stationary components reveal themselves and model the data accordingly.
When reviewing the variables and the scatter plot from our previous EDA we can see that there are some correlations that were expected, for example currentHospitalizations is correlated with the variables that refelct ICU and Ventilaor patients. These metrics wouldn’t exist without hospitalizations. These variables also cannot help up predict hospitalizations because they after occurances. So we will actually be leveraing variables such as positive and positive increase in order to see if there is some sort of correlation between hospitalized patients and the number of positive cases.
ggplot(data = us, aes(x=date, y=positive))+
geom_line(color="orange")+
labs(title = "Total Cases US", y = "Millions", x = "") +
theme_fivethirtyeight()
ggplot(data = us, aes(x=date, y=positiveIncrease))+
geom_line(color="orange")+
labs(title = "Increase in COVID Cases US", y = "Thousands", x = "") +
theme_fivethirtyeight()
#a
plotts.sample.wge(us$positiveIncrease)
## $autplt
## [1] 1.0000000 0.9740745 0.9439116 0.9109437 0.8869924 0.8711719 0.8605261
## [8] 0.8462935 0.8144161 0.7772852 0.7356766 0.7045767 0.6829609 0.6646632
## [15] 0.6432819 0.6088202 0.5688872 0.5293417 0.5001353 0.4761589 0.4592724
## [22] 0.4426271 0.4148783 0.3789333 0.3458221 0.3182895
##
## $freq
## [1] 0.005347594 0.010695187 0.016042781 0.021390374 0.026737968
## [6] 0.032085561 0.037433155 0.042780749 0.048128342 0.053475936
## [11] 0.058823529 0.064171123 0.069518717 0.074866310 0.080213904
## [16] 0.085561497 0.090909091 0.096256684 0.101604278 0.106951872
## [21] 0.112299465 0.117647059 0.122994652 0.128342246 0.133689840
## [26] 0.139037433 0.144385027 0.149732620 0.155080214 0.160427807
## [31] 0.165775401 0.171122995 0.176470588 0.181818182 0.187165775
## [36] 0.192513369 0.197860963 0.203208556 0.208556150 0.213903743
## [41] 0.219251337 0.224598930 0.229946524 0.235294118 0.240641711
## [46] 0.245989305 0.251336898 0.256684492 0.262032086 0.267379679
## [51] 0.272727273 0.278074866 0.283422460 0.288770053 0.294117647
## [56] 0.299465241 0.304812834 0.310160428 0.315508021 0.320855615
## [61] 0.326203209 0.331550802 0.336898396 0.342245989 0.347593583
## [66] 0.352941176 0.358288770 0.363636364 0.368983957 0.374331551
## [71] 0.379679144 0.385026738 0.390374332 0.395721925 0.401069519
## [76] 0.406417112 0.411764706 0.417112299 0.422459893 0.427807487
## [81] 0.433155080 0.438502674 0.443850267 0.449197861 0.454545455
## [86] 0.459893048 0.465240642 0.470588235 0.475935829 0.481283422
## [91] 0.486631016 0.491978610 0.497326203
##
## $db
## [1] 14.3997611 15.8955909 7.7642205 8.7050186 3.8198773
## [6] -1.1863576 2.6815084 -0.4667544 -1.0582274 -3.9729372
## [11] -4.4388886 -5.3739230 -7.0574666 -3.9001118 -5.3732547
## [16] -5.9203161 -6.0265835 -6.3073466 -8.7427682 -8.5315257
## [21] -8.1694048 -8.8726978 -6.2228751 -5.6255282 -5.7869081
## [26] -2.3190255 -0.6895940 -18.2510780 -12.7252753 -17.9895004
## [31] -13.1495229 -17.1809062 -12.9879824 -18.5885318 -14.0950716
## [36] -19.2823521 -16.1802599 -22.2193540 -17.1778910 -15.9864725
## [41] -13.8720977 -14.3606703 -12.3482825 -19.0159734 -12.8299009
## [46] -16.1634117 -19.2402721 -16.9940391 -19.9193164 -24.4462952
## [51] -12.2184745 -14.1347405 -11.1407485 -19.5608640 -22.3406178
## [56] -26.2948840 -15.9219942 -19.7896219 -16.3429699 -16.6545137
## [61] -13.2991307 -20.9465604 -12.5392646 -23.3015328 -21.5282804
## [66] -17.4748203 -26.6390880 -24.5923917 -17.5647166 -19.7282494
## [71] -17.6845420 -23.0825883 -19.0743556 -19.4958696 -19.5138333
## [76] -20.4270518 -18.7343253 -16.8982195 -14.1306252 -14.2054286
## [81] -15.0466299 -13.4474119 -14.7662082 -15.2848488 -14.7767269
## [86] -17.2543647 -17.5524024 -16.7680943 -23.6931393 -19.6888637
## [91] -22.9328710 -17.1805763 -15.7422487
##
## $dbz
## [1] 12.1744244 11.8068871 11.1912920 10.3234897 9.1987290
## [6] 7.8134517 6.1690554 4.2794940 2.1855400 -0.0227779
## [11] -2.1856123 -4.0930116 -5.5814407 -6.6274820 -7.3078903
## [16] -7.6988921 -7.8526856 -7.8271468 -7.6919809 -7.5061868
## [21] -7.3030888 -7.0977786 -6.9055053 -6.7545171 -6.6859448
## [26] -6.7448940 -6.9710578 -7.3933729 -8.0283548 -8.8797888
## [31] -9.9376266 -11.1745795 -12.5395304 -13.9486256 -15.2804168
## [36] -16.3913300 -17.1665125 -17.5812779 -17.7097183 -17.6695854
## [41] -17.5637030 -17.4562894 -17.3753921 -17.3239844 -17.2913960
## [46] -17.2628022 -17.2262162 -17.1767997 -17.1186074 -17.0640034
## [51] -17.0309217 -17.0382729 -17.1002705 -17.2209453 -17.3905128
## [56] -17.5857017 -17.7761429 -17.9367144 -18.0606665 -18.1649087
## [61] -18.2832201 -18.4521578 -18.6979308 -19.0286543 -19.4319317
## [66] -19.8765383 -20.3176913 -20.7053075 -20.9929698 -21.1442791
## [71] -21.1356798 -20.9585908 -20.6228760 -20.1586258 -19.6118560
## [76] -19.0349178 -18.4769322 -17.9782467 -17.5690679 -17.2703544
## [81] -17.0951532 -17.0493949 -17.1318377 -17.3332302 -17.6350503
## [86] -18.0085946 -18.4157858 -18.8133902 -19.1613062 -19.4324915
## [91] -19.6189508 -19.7292633 -19.7788462
b. Differencing Data: much more stationary data and have surfaced a seasonaly component = 7 seen on ACF peaks at 7, 14, 21
#b
inpos.diff = artrans.wge(us$positiveIncrease, phi.tr = 1, lag.max = 100)
c. Seaonality Transformation: stationary data, and an ACF that reflects data other than whitenoise to be modeled.
#c
inpos.diff.seas = artrans.wge(inpos.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
d. Model IDing: diagnose with aic.wge to determine phi, thetas: Both AIC/BIC select ARMA(4,2)
#d
aic5.wge(inpos.diff.seas)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 15 4 2 15.62368
## 9 2 2 15.74701
## 18 5 2 15.74771
## 12 3 2 15.75351
## 7 2 0 15.75551
aic5.wge(inpos.diff.seas, type = "bic")
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 15 4 2 15.74832
## 2 0 1 15.79201
## 7 2 0 15.80893
## 4 1 0 15.81457
## 3 0 2 15.81522
e. White Noise Test: Reject the H_null and accept this data is not white noise
#e
acf(inpos.diff.seas)
ljung.wge(inpos.diff.seas)$pval
## Obs -0.3711871 -0.02127283 0.07854862 0.05194601 -0.1044838 0.3276797 -0.4658847 0.1565306 0.1284186 -0.1060791 -0.07693327 0.1030723 -0.06391648 0.05002847 0.004471288 -0.03640442 -0.01551996 0.04133725 -0.01110058 -0.08153895 0.06570572 -0.03709739 -0.02390062 0.04588417
## [1] 1.246447e-12
f. Estiamte Phis, Thetas
g. Model Building
#f
est.inpos.seas = est.arma.wge(inpos.diff.seas, p = 4, q = 2)
##
## Coefficients of Original polynomial:
## -1.8727 -1.4974 -0.4735 0.0283
##
## Factor Roots Abs Recip System Freq
## 1+1.1293B+0.6983B^2 -0.8086+-0.8822i 0.8357 0.3681
## 1+0.7944B -1.2588 0.7944 0.5000
## 1-0.0510B 19.6244 0.0510 0.0000
##
##
mean(us$positiveIncrease)
## [1] 22567.12
#g
h. Forecast 7/90 Days
#7 day
inpos.preds7 = fore.aruma.wge(us$positiveIncrease, phi = est.inpos.seas$phi, theta = est.inpos.seas$theta, d=1, s=7, n.ahead = 7)
#90 day
inpos.preds90 = fore.aruma.wge(us$positiveIncrease, phi = est.inpos.seas$phi, theta = est.inpos.seas$theta, d=1, s=7, n.ahead = 90)
i. Plotting forecasts
#7 day forecast
plot(seq(1,187,1), us$positiveIncrease, type = "l", xlim = c(0,195), ylim = c(0,80000), ylab = "Increase in COVID Cases", main = "7 Day Increase in COVID Cases Forecast")
lines(seq(188, 194,1), inpos.preds7$f, type = "l", col = "red")
#90 day forecast
plot(seq(1,187,1), us$positiveIncrease, type = "l", xlim = c(0,280), ylim = c(0,80000), ylab = "Increase in COVID Cases", main = "90 Day Increase in COVID Cases Forecast")
lines(seq(188, 277,1), inpos.preds90$f, type = "l", col = "red")
#Selecting only those dates with reported current hospitilizations
us <- dplyr::slice(us,56:187)
invisible(us)
#Stationarize Currently Hospitalized Variable
us.diff = artrans.wge(us$hospitalizedCurrently, phi.tr = 1, lag.max = 100)
acf(us.diff)
us.diff.seas = artrans.wge(us.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
acf(us.diff.seas)
#Stationarize Increase Positive Variable
inpos.diff = artrans.wge(us$positiveIncrease, phi.tr = 1, lag.max = 100)
acf(inpos.diff)
inpos.diff.seas = artrans.wge(inpos.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
acf(inpos.diff.seas)
mlr.fit = lm(us.diff.seas~inpos.diff.seas, data=cbind(data.frame(us.diff.seas), data.frame(inpos.diff.seas)))
summary(mlr.fit)
##
## Call:
## lm(formula = us.diff.seas ~ inpos.diff.seas, data = cbind(data.frame(us.diff.seas),
## data.frame(inpos.diff.seas)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7149.3 -554.2 73.4 611.2 6506.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.32614 143.23299 -0.093 0.92603
## inpos.diff.seas 0.11733 0.04227 2.776 0.00637 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1595 on 122 degrees of freedom
## Multiple R-squared: 0.0594, Adjusted R-squared: 0.0517
## F-statistic: 7.705 on 1 and 122 DF, p-value: 0.006375
AIC(mlr.fit)
## [1] 2184.712
acf(mlr.fit$residuals)
plot(mlr.fit$residuals)
mlr.phis= aic.wge(mlr.fit$residuals)
mlr.phis
## $type
## [1] "aic"
##
## $value
## [1] 14.53403
##
## $p
## [1] 5
##
## $q
## [1] 2
##
## $phi
## [1] -0.2465069 -0.4219100 0.3119255 0.2116968 0.2024803
##
## $theta
## [1] -0.4657537 -0.9566014
##
## $vara
## [1] 1803071
mlr.fit.arima = arima(us$hospitalizedCurrently, order = c(5,1,2), seasonal = list(order = c(1,0,0), period = 7), xreg = us$positiveIncrease)
## Warning in log(s2): NaNs produced
AIC(mlr.fit.arima)
## [1] 2224.09
acf(mlr.fit.arima$resid)
ltest1 = ljung.wge(mlr.fit.arima$resid)
## Obs -0.006649913 -0.009334226 -0.01070918 0.01827832 -0.02771128 -0.01061308 -0.01123975 0.08781402 -0.05809279 0.07500195 0.05661161 -0.08527475 -0.007553036 0.1098068 0.002427647 -0.010962 -0.04242877 -0.001526285 -0.05612271 -0.03313627 0.02930685 -0.007307643 -0.06056842 -0.03342384
ltest1$pval
## [1] 0.999208
ltest2 = ljung.wge(mlr.fit.arima$resid, K= 48)
## Obs -0.006649913 -0.009334226 -0.01070918 0.01827832 -0.02771128 -0.01061308 -0.01123975 0.08781402 -0.05809279 0.07500195 0.05661161 -0.08527475 -0.007553036 0.1098068 0.002427647 -0.010962 -0.04242877 -0.001526285 -0.05612271 -0.03313627 0.02930685 -0.007307643 -0.06056842 -0.03342384 0.08407797 -0.006875273 -0.002312484 0.06777622 -0.09591675 -0.06198868 -0.01331578 -0.03441461 -0.04091228 0.0270524 0.06615092 0.06538511 -0.09261011 -0.04939741 -0.03187124 -0.04934637 -0.01834029 -0.02031817 -0.001873916 0.02337696 -0.004828854 0.006148926 -0.002455308 0.004810674
ltest2$pval
## [1] 0.9999866
#7 Day Case Increase
regs7 = data.frame(positiveIncrease = inpos.preds7$f)
invisible(regs7)
b. 90 Day
#90 Day Case Increase
regs90 = data.frame(positiveIncrease = inpos.preds90$f)
invisible(regs90)
mlr1.preds7 = predict(mlr.fit.arima, newxreg = regs7, n.ahead = 7)
invisible(mlr1.preds7)
b. 90 Day
mlr1.preds90 = predict(mlr.fit.arima, newxreg = regs90, n.ahead =90)
invisible(mlr1.preds90)
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,140), ylim = c(0,60000), ylab = "Currently Hospitalized COVID Cases", main = "7 Day Forecast for COVID Hospitalized Cases")
lines(seq(133,139,1), mlr1.preds7$pred, type = "l", col = "red")
b. 90 Day
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,223), ylim = c(0,60000), ylab = "Currently Hospitalized COVID Cases", main = "90 Day Forecast for COVID Hospitalized Cases")
lines(seq(133,222,1), mlr1.preds90$pred, type = "l", col = "red")
time <- seq(1,124,1)
mlr.fit.t = lm(us.diff.seas~inpos.diff.seas+time, data=cbind(data.frame(us.diff.seas), data.frame(inpos.diff.seas)))
summary(mlr.fit.t)
##
## Call:
## lm(formula = us.diff.seas ~ inpos.diff.seas + time, data = cbind(data.frame(us.diff.seas),
## data.frame(inpos.diff.seas)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7111.0 -524.5 73.9 617.7 6540.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.35112 289.27171 0.112 0.91114
## inpos.diff.seas 0.11724 0.04244 2.762 0.00663 **
## time -0.73096 4.01658 -0.182 0.85590
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1601 on 121 degrees of freedom
## Multiple R-squared: 0.05966, Adjusted R-squared: 0.04412
## F-statistic: 3.839 on 2 and 121 DF, p-value: 0.02419
AIC(mlr.fit.t)
## [1] 2186.678
acf(mlr.fit.t$residuals)
plot(mlr.fit.t$residuals)
mlr.phis= aic.wge(mlr.fit.t$residuals)
mlr.phis
## $type
## [1] "aic"
##
## $value
## [1] 14.53329
##
## $p
## [1] 5
##
## $q
## [1] 2
##
## $phi
## [1] -0.2466126 -0.4220817 0.3118538 0.2119376 0.2027949
##
## $theta
## [1] -0.4655288 -0.9564592
##
## $vara
## [1] 1801724
Time <- seq(1,132,1)
mlr.fit.arima.t = arima(us$hospitalizedCurrently, order = c(5,1,2), seasonal = list(order = c(1,0,0), period = 7), xreg = cbind(us$positiveIncrease, Time))
AIC(mlr.fit.arima.t)
## [1] 2230.756
acf(mlr.fit.arima.t$resid)
ltest1 = ljung.wge(mlr.fit.arima.t$resid)
## Obs 0.002267493 0.004743329 0.004085712 0.008584111 0.003613306 0.03592544 -0.02552583 0.08439651 -0.1060931 0.01276058 0.02640347 -0.06061388 0.05268146 0.1458814 0.008052735 -0.04255608 -0.0935536 -0.04208266 -0.0498013 -0.001717023 0.05934226 0.01341049 -0.07137289 -0.07279666
ltest1$pval
## [1] 0.9823161
ltest2 = ljung.wge(mlr.fit.arima.t$resid, K= 48)
## Obs 0.002267493 0.004743329 0.004085712 0.008584111 0.003613306 0.03592544 -0.02552583 0.08439651 -0.1060931 0.01276058 0.02640347 -0.06061388 0.05268146 0.1458814 0.008052735 -0.04255608 -0.0935536 -0.04208266 -0.0498013 -0.001717023 0.05934226 0.01341049 -0.07137289 -0.07279666 0.05320601 -0.01313386 0.01794322 0.1027134 -0.07595104 -0.06199621 -0.03306343 -0.06206906 -0.03944973 0.03988941 0.07572305 0.07297186 -0.1029145 -0.06857141 -0.04403521 -0.04289784 0.005428483 -0.003871671 -7.833593e-05 0.005208028 -0.03085214 -0.01036108 0.00645461 0.02868683
ltest2$pval
## [1] 0.9990337
#7 Day Case Increase
regs7t = data.frame(cbind(positiveIncrease = inpos.preds7$f, Time = seq(133,139)))
invisible(regs7)
b. 90 Day
#90 Day Case Increase
regs90t = data.frame(cbind(positiveIncrease = inpos.preds90$f, Time = seq(133,222)))
invisible(regs90)
mlr1.preds7.t = predict(mlr.fit.arima.t, newxreg = regs7t, n.ahead = 7)
invisible(mlr1.preds7.t)
b. 90 Day
mlr1.preds90.t = predict(mlr.fit.arima.t, newxreg = regs90t, n.ahead =90)
invisible(mlr1.preds90.t)
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,140), ylim = c(0,60000), ylab = "Currently Hospitalized COVID Cases", main = "7 Day Forecast for COVID Hospitalized Cases")
lines(seq(133,139,1), mlr1.preds7.t$pred, type = "l", col = "red")
b. 90 Day
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,223), ylim = c(0,85000), ylab = "Currently Hospitalized COVID Cases", main = "90 Day Forecast for COVID Hospitalized Cases")
lines(seq(133,222,1), mlr1.preds90.t$pred, type = "l", col = "red")
With a quick check, we can see that there is no lag correlationed between the increase of COVID patients and hospitalized patients, like we theorized there might be. So we will not model an MLR w/ correlated errors on lagged variables.
ccf(us$positiveIncrease,us$hospitalizedCurrently)
Since we are working with a seasonal and transformation component but it requires an additional transformation for the total positive COVID cases to become stationary for evaluation, we will only use positive increase of cases to predict currently hospitzliaed cases for the VAR model.
#Positive Cases Transformations
#pos.diff = artrans.wge(us$positive, phi.tr = 1, lag.max = 100)
#pos.diff.2 = artrans.wge(pos.diff, phi.tr = 1, lag.max = 100)
#pos.trans = artrans.wge(pos.diff.2,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#Increase Positive Transformation
inpos.diff = artrans.wge(us$positiveIncrease, phi.tr = 1, lag.max = 100)
inpos.trans = artrans.wge(inpos.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#Current Hospitalization Transformations
us.diff = artrans.wge(us$hospitalizedCurrently, phi.tr = 1, lag.max = 100)
currhosp.trans = artrans.wge(us.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#VARselect to choose lag
VARselect(cbind(currhosp.trans, inpos.trans), lag.max = 10, type= "both")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 7 7 2 7
##
## $criteria
## 1 2 3 4 5
## AIC(n) 3.093857e+01 3.083329e+01 3.079091e+01 3.076976e+01 3.069554e+01
## HQ(n) 3.101650e+01 3.095018e+01 3.094676e+01 3.096458e+01 3.092932e+01
## SC(n) 3.113058e+01 3.112131e+01 3.117493e+01 3.124979e+01 3.127158e+01
## FPE(n) 2.731960e+13 2.459304e+13 2.357880e+13 2.309552e+13 2.145770e+13
## 6 7 8 9 10
## AIC(n) 3.066656e+01 3.051427e+01 3.053424e+01 3.059674e+01 3.059649e+01
## HQ(n) 3.093930e+01 3.082598e+01 3.088492e+01 3.098638e+01 3.102509e+01
## SC(n) 3.133860e+01 3.128233e+01 3.139830e+01 3.155681e+01 3.165257e+01
## FPE(n) 2.086403e+13 1.793906e+13 1.833019e+13 1.955149e+13 1.959499e+13
#fit model
usdiff.var.fit = VAR(cbind(currhosp.trans, inpos.trans), type = "both", p = 2)
#7 day predictions
pred.var7 = predict(usdiff.var.fit, n.ahead = 7)
pred.var7$fcst$currhosp.trans
## fcst lower upper CI
## [1,] -42.12350 -2942.410 2858.163 2900.286
## [2,] -385.17371 -3346.493 2576.145 2961.319
## [3,] -80.65766 -3262.529 3101.214 3181.871
## [4,] -124.14161 -3327.324 3079.041 3203.182
## [5,] -46.62794 -3284.538 3191.282 3237.910
## [6,] -43.58380 -3288.207 3201.039 3244.623
## [7,] -11.23201 -3262.277 3239.813 3251.045
b. 90 Day
#90 day predictions
pred.var90 = predict(usdiff.var.fit, n.ahead = 90)
pred.var90$fcst$currhosp.trans
## fcst lower upper CI
## [1,] -42.123501 -2942.410 2858.163 2900.286
## [2,] -385.173707 -3346.493 2576.145 2961.319
## [3,] -80.657665 -3262.529 3101.214 3181.871
## [4,] -124.141611 -3327.324 3079.041 3203.182
## [5,] -46.627942 -3284.538 3191.282 3237.910
## [6,] -43.583795 -3288.207 3201.039 3244.623
## [7,] -11.232008 -3262.277 3239.813 3251.045
## [8,] -6.349200 -3259.226 3246.528 3252.877
## [9,] 7.305513 -3246.870 3261.481 3254.175
## [10,] 12.421029 -3242.210 3267.053 3254.632
## [11,] 18.773716 -3236.131 3273.679 3254.905
## [12,] 22.480950 -3232.533 3277.495 3255.014
## [13,] 26.203504 -3228.870 3281.277 3255.073
## [14,] 28.923414 -3226.175 3284.022 3255.099
## [15,] 31.503265 -3223.608 3286.615 3255.112
## [16,] 33.699320 -3221.418 3288.817 3255.117
## [17,] 35.776726 -3219.344 3290.897 3255.120
## [18,] 37.697587 -3217.424 3292.819 3255.122
## [19,] 39.549962 -3215.572 3294.672 3255.122
## [20,] 41.334045 -3213.789 3296.457 3255.123
## [21,] 43.082251 -3212.041 3298.205 3255.123
## [22,] 44.799892 -3210.323 3299.923 3255.123
## [23,] 46.499486 -3208.623 3301.622 3255.123
## [24,] 48.185069 -3206.938 3303.308 3255.123
## [25,] 49.861824 -3205.261 3304.985 3255.123
## [26,] 51.532055 -3203.591 3306.655 3255.123
## [27,] 53.198022 -3201.925 3308.321 3255.123
## [28,] 54.860928 -3200.262 3309.984 3255.123
## [29,] 56.521787 -3198.601 3311.645 3255.123
## [30,] 58.181203 -3196.942 3313.304 3255.123
## [31,] 59.839642 -3195.283 3314.963 3255.123
## [32,] 61.497398 -3193.626 3316.620 3255.123
## [33,] 63.154688 -3191.968 3318.278 3255.123
## [34,] 64.811654 -3190.311 3319.935 3255.123
## [35,] 66.468399 -3188.655 3321.591 3255.123
## [36,] 68.124990 -3186.998 3323.248 3255.123
## [37,] 69.781476 -3185.341 3324.904 3255.123
## [38,] 71.437889 -3183.685 3326.561 3255.123
## [39,] 73.094252 -3182.029 3328.217 3255.123
## [40,] 74.750580 -3180.372 3329.874 3255.123
## [41,] 76.406884 -3178.716 3331.530 3255.123
## [42,] 78.063172 -3177.060 3333.186 3255.123
## [43,] 79.719449 -3175.404 3334.842 3255.123
## [44,] 81.375717 -3173.747 3336.499 3255.123
## [45,] 83.031981 -3172.091 3338.155 3255.123
## [46,] 84.688240 -3170.435 3339.811 3255.123
## [47,] 86.344498 -3168.778 3341.467 3255.123
## [48,] 88.000753 -3167.122 3343.124 3255.123
## [49,] 89.657007 -3165.466 3344.780 3255.123
## [50,] 91.313260 -3163.810 3346.436 3255.123
## [51,] 92.969513 -3162.153 3348.092 3255.123
## [52,] 94.625766 -3160.497 3349.749 3255.123
## [53,] 96.282018 -3158.841 3351.405 3255.123
## [54,] 97.938270 -3157.185 3353.061 3255.123
## [55,] 99.594521 -3155.528 3354.717 3255.123
## [56,] 101.250773 -3153.872 3356.374 3255.123
## [57,] 102.907025 -3152.216 3358.030 3255.123
## [58,] 104.563276 -3150.560 3359.686 3255.123
## [59,] 106.219528 -3148.903 3361.342 3255.123
## [60,] 107.875779 -3147.247 3362.999 3255.123
## [61,] 109.532031 -3145.591 3364.655 3255.123
## [62,] 111.188282 -3143.935 3366.311 3255.123
## [63,] 112.844534 -3142.278 3367.967 3255.123
## [64,] 114.500785 -3140.622 3369.624 3255.123
## [65,] 116.157037 -3138.966 3371.280 3255.123
## [66,] 117.813288 -3137.310 3372.936 3255.123
## [67,] 119.469540 -3135.653 3374.592 3255.123
## [68,] 121.125791 -3133.997 3376.249 3255.123
## [69,] 122.782043 -3132.341 3377.905 3255.123
## [70,] 124.438294 -3130.685 3379.561 3255.123
## [71,] 126.094546 -3129.028 3381.218 3255.123
## [72,] 127.750797 -3127.372 3382.874 3255.123
## [73,] 129.407049 -3125.716 3384.530 3255.123
## [74,] 131.063300 -3124.060 3386.186 3255.123
## [75,] 132.719552 -3122.403 3387.843 3255.123
## [76,] 134.375803 -3120.747 3389.499 3255.123
## [77,] 136.032055 -3119.091 3391.155 3255.123
## [78,] 137.688306 -3117.435 3392.811 3255.123
## [79,] 139.344558 -3115.778 3394.468 3255.123
## [80,] 141.000809 -3114.122 3396.124 3255.123
## [81,] 142.657061 -3112.466 3397.780 3255.123
## [82,] 144.313312 -3110.810 3399.436 3255.123
## [83,] 145.969564 -3109.153 3401.093 3255.123
## [84,] 147.625815 -3107.497 3402.749 3255.123
## [85,] 149.282067 -3105.841 3404.405 3255.123
## [86,] 150.938318 -3104.185 3406.061 3255.123
## [87,] 152.594570 -3102.528 3407.718 3255.123
## [88,] 154.250821 -3100.872 3409.374 3255.123
## [89,] 155.907073 -3099.216 3411.030 3255.123
## [90,] 157.563324 -3097.560 3412.686 3255.123
startingPoints7 = us$hospitalizedCurrently[126:132]
currHospForecasts7 = pred.var7$fcst$currhosp.trans[,1:3] + startingPoints7
b. 90 Day
startingPoints90 = us$hospitalizedCurrently[43:132]
currHospForecasts90 = pred.var90$fcst$currhosp.trans[,1:3] + startingPoints90
#7 day Forecasts
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,139), ylab = "Currently Hospitalized COVID Patients", main = "7 Day Currently Hospitalized Patients Forecast")
lines(seq(133,139,1), as.data.frame(currHospForecasts7)$fcst, type = "l", col = "red")
b. 90 Day
#90 day Forecasts
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,222), ylab = "Currently Hospitalized COVID Patients", main = "90 Day Currently Hospitalized Patients Forecast")
lines(seq(133,222,1), as.data.frame(currHospForecasts90)$fcst, type = "l", col = "red")
#varASE = mean((us$hospitalizedCurrently[123:132]-pred.var$fcst$y1[1:10])^2)
#varASE
Since we have been looking at additional regressors for all our above models and know that the best regressor to leverage will be positive increase of COVID cases. This regressor reflects similar behavior to that of current hospitlizations and can be model properly against hospitalizations. 1. Create Current Hospitalized ts variable
tUScurrhop = ts(us$hospitalizedCurrently)
tUSx = data.frame(positiveIncrease = ts(us$positiveIncrease))
fit.mlp.new = mlp(ts(tUSx), reps = 50, comb = "mean")
mlp.new.fore7 = forecast(fit.mlp.new, h = 7)
mlp.new.fore7
## Point Forecast
## 133 60074.86
## 134 63365.83
## 135 64993.45
## 136 66006.27
## 137 65300.01
## 138 64878.22
## 139 64482.41
b. 90 Day Forecast for Regressor
mlp.new.fore90 = forecast(fit.mlp.new, h = 90)
mlp.new.fore90
## Point Forecast
## 133 60074.86
## 134 63365.83
## 135 64993.45
## 136 66006.27
## 137 65300.01
## 138 64878.22
## 139 64482.41
## 140 64654.54
## 141 64770.89
## 142 64930.54
## 143 64913.16
## 144 64914.77
## 145 64875.07
## 146 64881.35
## 147 64871.10
## 148 64879.11
## 149 64873.34
## 150 64878.51
## 151 64879.68
## 152 64888.68
## 153 64891.00
## 154 64890.90
## 155 64883.58
## 156 64878.21
## 157 64875.23
## 158 64878.53
## 159 64883.63
## 160 64888.58
## 161 64889.10
## 162 64886.37
## 163 64881.84
## 164 64879.19
## 165 64879.10
## 166 64881.47
## 167 64884.09
## 168 64885.72
## 169 64885.44
## 170 64884.02
## 171 64882.31
## 172 64881.40
## 173 64881.46
## 174 64882.32
## 175 64883.30
## 176 64883.96
## 177 64883.96
## 178 64883.47
## 179 64882.77
## 180 64882.30
## 181 64882.24
## 182 64882.58
## 183 64883.05
## 184 64883.39
## 185 64883.42
## 186 64883.19
## 187 64882.84
## 188 64882.61
## 189 64882.60
## 190 64882.78
## 191 64883.00
## 192 64883.15
## 193 64883.15
## 194 64883.02
## 195 64882.87
## 196 64882.78
## 197 64882.79
## 198 64882.87
## 199 64882.97
## 200 64883.03
## 201 64883.02
## 202 64882.96
## 203 64882.90
## 204 64882.86
## 205 64882.87
## 206 64882.90
## 207 64882.95
## 208 64882.97
## 209 64882.97
## 210 64882.94
## 211 64882.91
## 212 64882.90
## 213 64882.90
## 214 64882.92
## 215 64882.94
## 216 64882.95
## 217 64882.95
## 218 64882.93
## 219 64882.92
## 220 64882.91
## 221 64882.91
## 222 64882.92
new.regressor7 <- data.frame(c(us$positiveIncrease, mlp.new.fore7$mean))
new.regressor7
## c.us.positiveIncrease..mlp.new.fore7.mean.
## 1 3613.00
## 2 3171.00
## 3 4671.00
## 4 6255.00
## 5 6885.00
## 6 9259.00
## 7 11442.00
## 8 10632.00
## 9 12873.00
## 10 17656.00
## 11 19044.00
## 12 19724.00
## 13 19655.00
## 14 21897.00
## 15 24694.00
## 16 25773.00
## 17 27992.00
## 18 31945.00
## 19 33252.00
## 20 25550.00
## 21 28937.00
## 22 30737.00
## 23 30534.00
## 24 34419.00
## 25 34351.00
## 26 30561.00
## 27 27844.00
## 28 25133.00
## 29 25631.00
## 30 30298.00
## 31 30923.00
## 32 31964.00
## 33 27963.00
## 34 27468.00
## 35 25872.00
## 36 26333.00
## 37 28916.00
## 38 31784.00
## 39 34174.00
## 40 35901.00
## 41 27380.00
## 42 22605.00
## 43 25219.00
## 44 26641.00
## 45 29568.00
## 46 33056.00
## 47 29185.00
## 48 25767.00
## 49 22523.00
## 50 22449.00
## 51 25056.00
## 52 27490.00
## 53 27605.00
## 54 24810.00
## 55 21504.00
## 56 18236.00
## 57 22663.00
## 58 21193.00
## 59 26705.00
## 60 24710.00
## 61 24701.00
## 62 20171.00
## 63 20992.00
## 64 20853.00
## 65 21319.00
## 66 26513.00
## 67 24555.00
## 68 21590.00
## 69 20105.00
## 70 18798.00
## 71 16629.00
## 72 19385.00
## 73 22601.00
## 74 23522.00
## 75 23762.00
## 76 21639.00
## 77 20415.00
## 78 19982.00
## 79 20312.00
## 80 20839.00
## 81 23352.00
## 82 23106.00
## 83 18823.00
## 84 17012.00
## 85 17175.00
## 86 20803.00
## 87 22032.00
## 88 23477.00
## 89 25341.00
## 90 21373.00
## 91 18510.00
## 92 23435.00
## 93 23873.00
## 94 27527.00
## 95 31046.00
## 96 31994.00
## 97 27284.00
## 98 27017.00
## 99 33021.00
## 100 38684.00
## 101 39072.00
## 102 44421.00
## 103 43783.00
## 104 41857.00
## 105 39175.00
## 106 47462.00
## 107 50674.00
## 108 53826.00
## 109 54223.00
## 110 54734.00
## 111 45789.00
## 112 41600.00
## 113 51766.00
## 114 62147.00
## 115 58836.00
## 116 66645.00
## 117 63007.00
## 118 60978.00
## 119 58465.00
## 120 62879.00
## 121 65382.00
## 122 70953.00
## 123 77233.00
## 124 65180.00
## 125 64884.00
## 126 56971.00
## 127 63642.00
## 128 69150.00
## 129 71027.00
## 130 75193.00
## 131 65413.00
## 132 61713.00
## 133 60074.86
## 134 63365.83
## 135 64993.45
## 136 66006.27
## 137 65300.01
## 138 64878.22
## 139 64482.41
b. 90 day regressor var
new.regressor90 <- data.frame(c(us$positiveIncrease, mlp.new.fore90$mean))
new.regressor90
## c.us.positiveIncrease..mlp.new.fore90.mean.
## 1 3613.00
## 2 3171.00
## 3 4671.00
## 4 6255.00
## 5 6885.00
## 6 9259.00
## 7 11442.00
## 8 10632.00
## 9 12873.00
## 10 17656.00
## 11 19044.00
## 12 19724.00
## 13 19655.00
## 14 21897.00
## 15 24694.00
## 16 25773.00
## 17 27992.00
## 18 31945.00
## 19 33252.00
## 20 25550.00
## 21 28937.00
## 22 30737.00
## 23 30534.00
## 24 34419.00
## 25 34351.00
## 26 30561.00
## 27 27844.00
## 28 25133.00
## 29 25631.00
## 30 30298.00
## 31 30923.00
## 32 31964.00
## 33 27963.00
## 34 27468.00
## 35 25872.00
## 36 26333.00
## 37 28916.00
## 38 31784.00
## 39 34174.00
## 40 35901.00
## 41 27380.00
## 42 22605.00
## 43 25219.00
## 44 26641.00
## 45 29568.00
## 46 33056.00
## 47 29185.00
## 48 25767.00
## 49 22523.00
## 50 22449.00
## 51 25056.00
## 52 27490.00
## 53 27605.00
## 54 24810.00
## 55 21504.00
## 56 18236.00
## 57 22663.00
## 58 21193.00
## 59 26705.00
## 60 24710.00
## 61 24701.00
## 62 20171.00
## 63 20992.00
## 64 20853.00
## 65 21319.00
## 66 26513.00
## 67 24555.00
## 68 21590.00
## 69 20105.00
## 70 18798.00
## 71 16629.00
## 72 19385.00
## 73 22601.00
## 74 23522.00
## 75 23762.00
## 76 21639.00
## 77 20415.00
## 78 19982.00
## 79 20312.00
## 80 20839.00
## 81 23352.00
## 82 23106.00
## 83 18823.00
## 84 17012.00
## 85 17175.00
## 86 20803.00
## 87 22032.00
## 88 23477.00
## 89 25341.00
## 90 21373.00
## 91 18510.00
## 92 23435.00
## 93 23873.00
## 94 27527.00
## 95 31046.00
## 96 31994.00
## 97 27284.00
## 98 27017.00
## 99 33021.00
## 100 38684.00
## 101 39072.00
## 102 44421.00
## 103 43783.00
## 104 41857.00
## 105 39175.00
## 106 47462.00
## 107 50674.00
## 108 53826.00
## 109 54223.00
## 110 54734.00
## 111 45789.00
## 112 41600.00
## 113 51766.00
## 114 62147.00
## 115 58836.00
## 116 66645.00
## 117 63007.00
## 118 60978.00
## 119 58465.00
## 120 62879.00
## 121 65382.00
## 122 70953.00
## 123 77233.00
## 124 65180.00
## 125 64884.00
## 126 56971.00
## 127 63642.00
## 128 69150.00
## 129 71027.00
## 130 75193.00
## 131 65413.00
## 132 61713.00
## 133 60074.86
## 134 63365.83
## 135 64993.45
## 136 66006.27
## 137 65300.01
## 138 64878.22
## 139 64482.41
## 140 64654.54
## 141 64770.89
## 142 64930.54
## 143 64913.16
## 144 64914.77
## 145 64875.07
## 146 64881.35
## 147 64871.10
## 148 64879.11
## 149 64873.34
## 150 64878.51
## 151 64879.68
## 152 64888.68
## 153 64891.00
## 154 64890.90
## 155 64883.58
## 156 64878.21
## 157 64875.23
## 158 64878.53
## 159 64883.63
## 160 64888.58
## 161 64889.10
## 162 64886.37
## 163 64881.84
## 164 64879.19
## 165 64879.10
## 166 64881.47
## 167 64884.09
## 168 64885.72
## 169 64885.44
## 170 64884.02
## 171 64882.31
## 172 64881.40
## 173 64881.46
## 174 64882.32
## 175 64883.30
## 176 64883.96
## 177 64883.96
## 178 64883.47
## 179 64882.77
## 180 64882.30
## 181 64882.24
## 182 64882.58
## 183 64883.05
## 184 64883.39
## 185 64883.42
## 186 64883.19
## 187 64882.84
## 188 64882.61
## 189 64882.60
## 190 64882.78
## 191 64883.00
## 192 64883.15
## 193 64883.15
## 194 64883.02
## 195 64882.87
## 196 64882.78
## 197 64882.79
## 198 64882.87
## 199 64882.97
## 200 64883.03
## 201 64883.02
## 202 64882.96
## 203 64882.90
## 204 64882.86
## 205 64882.87
## 206 64882.90
## 207 64882.95
## 208 64882.97
## 209 64882.97
## 210 64882.94
## 211 64882.91
## 212 64882.90
## 213 64882.90
## 214 64882.92
## 215 64882.94
## 216 64882.95
## 217 64882.95
## 218 64882.93
## 219 64882.92
## 220 64882.91
## 221 64882.91
## 222 64882.92
mlp.fit1 = mlp(tUScurrhop, xreg = tUSx, outplot = T, comb = "mean")
plot(mlp.fit1)
currhosp.fore7 = forecast(mlp.fit1, h = 7, xreg = new.regressor7)
plot(currhosp.fore7)
b. Currently Hospitalized 90 Day Forecast w/ Positive Increaser Regressor
currhosp.fore90 = forecast(mlp.fit1, h = 90, xreg = new.regressor90)
plot(currhosp.fore90)
#if (!require(devtools)) {
# install.packages("devtools")
#}
#devtools::install_github("josephsdavid/tswgewrapped", build_vignettes = TRUE)
library(tswgewrapped)
data_train.m <- data.frame(hospitalizedCurrently = us$hospitalizedCurrently[1:122], positiveIncrease = us$positiveIncrease[1:122])
data_test.m <- data.frame(hospitalizedCurrently = us$hospitalizedCurrently[123:132], positiveIncrease = us$positiveIncrease[123:132])
# search for best NN hyperparameters in given grid
model.m = tswgewrapped::ModelBuildNNforCaret$new(data = data_train.m, var_interest = "hospitalizedCurrently",
search = 'random', tuneLength = 5, parallel = TRUE,
batch_size = 50, h = 7, m = 7,
verbose = 1)
## Registered S3 method overwritten by 'lava':
## method from
## print.pcor greybox
## Aggregating results
## Selecting tuning parameters
## Fitting reps = 10, hd = 2, allow.det.season = TRUE on full training set
## - Total Time for training: : 48.936 sec elapsed
res.m <- model.m$summarize_hyperparam_results()
res.m
## reps hd allow.det.season RMSE ASE RMSESD ASESD
## 1 10 2 TRUE 3103.429 13970241 2184.689 16296534
## 2 11 3 FALSE 3129.341 14942705 2380.109 19673017
## 3 16 1 FALSE 3559.638 16480775 2047.127 15988977
## 4 16 3 TRUE 3204.752 15391189 2373.358 19509361
## 5 22 3 FALSE 3268.191 16196849 2463.200 19979406
model.m$plot_hyperparam_results()
best.m <- model.m$summarize_best_hyperparams()
best.m
## reps hd allow.det.season
## 1 10 2 TRUE
final.ase.m <- dplyr::filter(res.m, reps == best.m$reps &
hd == best.m$hd &
allow.det.season == best.m$allow.det.season)[['ASE']]
final.ase.m
## [1] 13970241
# Final Model
caret_model.m = model.m$get_final_models(subset = 'a')
caret_model.m$finalModel
## MLP fit with 2 hidden nodes and 10 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,3)
## Forecast combined using the median operator.
## MSE: 1065689.5459.
# Plot Final Model
plot(caret_model.m$finalModel)